CREATE TABLE IF NOT EXISTS employees12 (
name STRING COMMENT 'Employee name',
salary FLOAT COMMENT 'Employee salary',
FexTax FLOAT,
StateTax FLOAT,
Insurance FLOAT,
address STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n';
LOAD DATA LOCAL INPATH '/empdata-comma.txt'
OVERWRITE INTO TABLE employees12;
The LOCAL keyword:
CREATE TABLE IF NOT EXISTS employees11 (
name STRING COMMENT 'Employee name',
salary FLOAT COMMENT 'Employee salary',
FedTax FLOAT,
StateTax FLOAT,
Insurance FLOAT,
address STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n';
LOAD DATA LOCAL INPATH '/empdata-tab.txt'
OVERWRITE INTO TABLE employees11;
CREATE TABLE IF NOT EXISTS employees20 (
name STRING COMMENT 'Employee name',
salary FLOAT COMMENT 'Employee salary',
subordinates ARRAY<STRING> COMMENT 'Names of subordinates',
deductions MAP<STRING, FLOAT>
COMMENT 'Keys are deductions names, values are percentages',
address STRUCT<street:STRING, city:STRING, state:STRING, ZIP:INT>
COMMENT 'Home address')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '^'
COLLECTION ITEMS TERMINATED BY '#'
MAP KEYS TERMINATED BY '@'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '/empdata.txt'
OVERWRITE INTO TABLE employees20;
CREATE TABLE IF NOT EXISTS employees23 (
name STRING COMMENT 'Employee name',
salary FLOAT COMMENT 'Employee salary',
subordinates ARRAY<STRING> COMMENT 'Names of subordinates',
deductions MAP<STRING, FLOAT>
COMMENT 'Keys are deductions names, values are percentages',
address STRUCT<street:STRING, city:STRING, state:STRING, ZIP:INT>
COMMENT 'Home address')
PARTITIONED BY (country STRING, state STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '^'
COLLECTION ITEMS TERMINATED BY '#'
MAP KEYS TERMINATED BY '@'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '/empdata-us-il.txt'
OVERWRITE INTO TABLE employees23
PARTITION (country = 'US', state = 'IL');
Select * from employees23;
LOAD DATA LOCAL INPATH '/empdata-us-ny.txt'
OVERWRITE INTO TABLE employees23
PARTITION (country = 'US', state = 'NY');
LOAD DATA LOCAL INPATH '/empdata-us-ca.txt'
OVERWRITE INTO TABLE employees23
PARTITION (country = 'US', state = 'CA');
See the whole table with three partitions:
Select * from employees23;
Select * from employees23 where state="NY";
Select * from employees23 where address.state="NY";
Select name, salary, address.city, address.state
from employees23
where salary > 120000;
Select name, salary, address.city, address.state
from employees23
where salary > 120000 and (state = 'CA' or state = 'IL');
Those of the employees23 table:
.../employees23/country=US/state=CA
.../employees23/country=US/state=IL
.../employees23/country=US/state=NY
The location of the loaded file and its contents:
CREATE TABLE inserttbl like employees20;
INSERT OVERWRITE TABLE inserttbl
SELECT * FROM employees20
WHERE salary > 100000.0;
SELECT * FROM inserttbl;
OVERWRITE, any previous contents of the partition (or whole table if not partitioned) are replacedINSERT OVERWRITE TABLE employees
PARTITION (country = 'US', state = 'OR')
SELECT * FROM staged_employees se
WHERE se.cnty = 'US' AND se.st = 'OR';
OVERWRITE or with INTO, Hive appends the data rather than replaces itINSERT INTO TABLE employees
PARTITION (country = 'US', state = 'OR')
SELECT * FROM staged_employees se
WHERE se.cnty = 'US' AND se.st = 'OR';
FROM staged_employees se
INSERT OVERWRITE TABLE employees
PARTITION (country = 'US', state = 'OR')
SELECT * WHERE se.cnty = 'US' AND se.st = 'OR'
INSERT OVERWRITE TABLE employees
PARTITION (country = 'US', state = 'CA')
SELECT * WHERE se.cnty = 'US' AND se.st = 'CA'
INSERT OVERWRITE TABLE employees
PARTITION (country = 'US', state = 'IL')
SELECT * WHERE se.cnty = 'US' AND se.st = 'IL';
Hive’s dynamic partition can infer the partitions to create based on query parameters:
INSERT OVERWRITE TABLE employees
PARTITION (country, state)
SELECT ..., se.cnty, se.st
FROM staged_employees se;
In this example, Hive determines the values of the partition keys, country and state, from the last two columns in the SELECT clause
Different names in staged_employees emphasize that the relationship between the source column values and the output partition values is by position only and not by matching on names
Static value for the country (US) and a dynamic value for the state. The static partition keys must come before the dynamic partition keys
INSERT OVERWRITE TABLE employees
PARTITION (country = 'US', state)
SELECT ..., se.cnty, se.st
FROM staged_employees se
WHERE se.cnty = 'US';
| Name | Default | Description |
|---|---|---|
hive.exec.dynamic.partition |
false |
Set to true to enable dynamic partitioning |
hive.exec.dynamic.partition.mode |
strict |
Set to nonstrict to enable all partitions to be determined dynamically |
hive.exec.max.dynamic.partitions.pernode |
100 |
The maximum number of dynamic partitions that can be created by each mapper or reducer. Raises a fatal error if one mapper or reducer attempts to create more than the threshold |
hive.exec.max.dynamic.partitions |
+1000 |
The total number of dynamic partitions that can be created by one statement with dynamic partitioning. Raises a fatal error if the limit is exceeded |
hive.exec.max.created.files |
100000 |
The maximum total number of files that can be created globally. A Hadoop counter is used to track the number of files created. Raises a fatal error if the limit is exceeded |
hive> set hive.exec.dynamic.partition=true;
hive> set hive.exec.dynamic.partition.mode=nonstrict;
hive> set hive.exec.max.dynamic.partitions.pernode=1000;
hive> INSERT OVERWRITE TABLE employees
> PARTITION (country, state)
> SELECT ..., se.cty, se.st
> FROM staged_employees se;
CREATE TABLE ca_employees
AS SELECT name, salary, address
FROM employees
WHERE state = 'CA';
INSERT OVERWRITE LOCAL DIRECTORY '/tmp/ca_employees'
SELECT name, salary, address
FROM employees
WHERE state = 'CA';
Independent of how the data is actually stored in the source table, it is written to files with all fields serialized as strings
hive> ! ls /tmp/ca_employees;
000000_0
hive> ! cat /tmp/ca_employees/000000_0
John Doe100000.0201 San Antonio CircleMountain ViewCA94040
Mary Smith80000.01 Infinity LoopCupertinoCA95014
...